*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0


* This do files uses cluster generated in C++ (lines 2-17 of Algorithm 1)  to 
* complete the deliniation of prime locations (lines 18-34 of Algorithm 1)
* This do file is run for various employment types (defined by gloabl $ET)

* Create temporary space for output
	capture mkdir "$temp/grid_PL_output_ET${ET}"	
	
* Loop over CBSAs
	qui u "$data_USMETROS/Raw Numeric Data/METRO LIST/METROS.dta", clear	
	qui tab cbsafp
	local Ncbsafp = r(N) // Total number of cities in list
	levelsof  cbsafp, local(USMETROIDS)	
	local count = 1
	* Begin loop by metro 
			foreach USmid of local USMETROIDS{	
			di "...Working on CBSA `USmid', number `count' of `Ncbsafp'..."
			u "$dataoutput/USMetroGridEmp/EmpType_${ET}/EmpGrid_`USmid'", clear	// Using baseline clusters (total emp, standard p-value)
		
			* Fix sepcial cases ************************************************
				 qui sum clusterID if clusterID != . // If there are no clusters
					local MaxCluster = r(max) 
					qui sum grid_x 
					local mingridX = round(r(min),0)
					qui sum grid_y if grid_x ==  `mingridX'
					local mingridY = round(r(min),0)
					if `MaxCluster' == 0 {
						 qui replace clusterID = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace developable = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace cell_total_emp = 999 if grid_x == `mingridX' & grid_y == `mingridY'
					}
					else {
						}
				qui sum clusterID if clusterID != .  // If there are too many clusters
					local MaxCluster = r(max) 
					qui sum grid_x 
					local mingridX = round(r(min),0)
					qui sum grid_y if grid_x ==  `mingridX'
					local mingridY = round(r(min),0)
					if `MaxCluster' > 50 {
						 qui replace clusterID = 0
						 qui replace clusterID = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace developable = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace cell_total_emp = 999 if grid_x == `mingridX' & grid_y == `mingridY'
					}
					else {
						}
			
			
			* DATA PREP ********************************************************
				qui ren cell_total_emp emp
			* Cluster employment relative to max PL employment in city
				qui gen TrimmedCluster = clusterID > 0							// Trimmed cluster variable
				qui egen Cemp = sum(emp), by(metro_id clusterID)
				qui egen Carea = count(emp), by(metro_id clusterID)
				qui gen Cempdens = 	Cemp  / Carea	
				qui egen CempMax = max(Cemp) if clusterID != 0, by(metro_id)	// Employment of largest cluster in metro
				qui gen CempRel = Cemp/CempMax 									// Cluster employment relative to largest cluster in metro
				qui replace TrimmedCluster = 0 if CempRel < $PLtresh			// Restrict to clusters with minimum relative employment

			* Ceneterate trimmed cluster variables	
				qui gen TrimmedClusterID = clusterID*TrimmedCluster if clusterID > 0 // Trimmed clusters identified
				qui egen TrimmedClusterX = mean(grid_x) if TrimmedCluster > 0, by(TrimmedClusterID metro_id)
				qui egen TrimmedClusterY = mean(grid_y) if TrimmedCluster > 0, by(TrimmedClusterID metro_id)		
	
			* AGGREGATION ******************************************************
				qui sum metro_id
				global MetroMin=r(min)
				global MetroMax=r(max)
				foreach num of numlist $MetroMin / $MetroMax { 					// This programme can process several metros. 
				local O = 999
				local i = 1
				while `O' > 0 {
				display "...round `i' of city `num'..."
				qui gen TrimmedClusterIDold = TrimmedClusterID 					// save old ID
				AGGCLUSTER2 `num' $DISTtresh
				qui gen test = abs(TrimmedClusterIDold-TrimmedClusterID) 		// generate objective
				qui sum test
				local O = r(sum)
				qui drop TrimmedClusterIDold test
				local i = `i'+1
				}
				}
				* Generate PL ID
				capture gen PLID = .											// gen PL ID variable
				qui replace PLID = TrimmedClusterID 

			* GENERATING CONVEX HULL *******************************************
				foreach num of numlist $MetroMin / $MetroMax { 					// loop over metros
				levelsof PLID if metro_id == `num', local(levels) 				// get list of PL IDs in a metro
				foreach pl of numlist `levels' {								// loop over PL IDs
					CH `num' `pl' 2
				}																// PL loop closes
				}																// metro loop closes
				
				* Clean temp folder 
				preserve 
					filelist, dir("$temp/PLtemp")
					levelsof filename, local(files)
					qui foreach f of local files{
						erase "$temp/PLtemp/`f'"
					}
				restore 
				
			* SPLIT PLs if parts are non-contiguous ****************************
				SPLITTER $SplitDist	
				
			* Generate PL variables	gen PL = PLID>0 & PLID != .
				qui gen PL = PLID>0 & PLID != .
				qui gen PL_x = grid_x if PL == 1
				qui gen PL_y = grid_y if PL == 1
				qui egen PL_emp = sum(emp) if PL == 1,by(metro_id PLID)
				qui egen PL_area = sum(area_g) if PL == 1,by(metro_id PLID)
				qui gen PL_empdens = PL_emp / PL_area
				qui egen PLX = mean(grid_x) if PLID > 0 & PLID != ., by(PLID metro_id)
				qui egen PLY = mean(grid_y) if PLID > 0 & PLID != ., by(PLID metro_id)		
				qui egen metro_emp = sum(emp) ,by(metro_id )

			* Flag as pseudo-PLs an drop unless the only one in city ***********************
				qui gen PPL = PL_emp < 10000  									// If PL has not enough employment
				preserve // Compute rank
				qui keep PL PPL PLID cbsafp PL_emp
					qui drop if PL == 0
					qui duplicates drop
					qui egen PLrankByCity = rank(PL_emp), by(cbsafp) field
					qui save "$temp/PLID_metro_rank_temp.dta", replace
				restore
				qui merge m:1 PLID cbsafp using "$temp/PLID_metro_rank_temp.dta", keepusing(PLrankByCity)	
				qui drop _m
				qui erase "$temp/PLID_metro_rank_temp.dta"
				qui  foreach var of varlist PL* {								// remove pseudo PLs, except if they are the larges PPL in the metro
					qui replace `var' = . if PPL == 1 & PLrankByCity != 1
				}
				qui replace PL = 0 if PL == .
				qui drop PL_emp // Now need to update PL_emp
				qui egen PL_emp = sum(emp) if PL == 1, by(cbsafp PLID)
				
			* Generate TS employment share within and outside PLs
				qui egen TSemp_insidePL = sum(cell_TS_emp) if PL == 1, by(cbsafp)
				qui egen TSemp_outsidePL = sum(cell_TS_emp) if PL == 0, by(cbsafp)
				foreach name in insidePL outsidePL {
					egen temp = mean(TSemp_`name'), by(cbsafp)
					drop TSemp_`name'
					ren temp TSemp_`name'
				}
				qui gen TSempshare_insidePL = TSemp_insidePL / TSemp_insidePL * 100
					label var TSempshare_insidePL "TS share within PLs (%)"
				qui gen TSempshare_outsidePL = TSemp_outsidePL / (metro_emp-TSemp_insidePL) *100
					label var TSempshare_outsidePL "TS share outside PLs (%)"
			* MAP PL
				local title = metro_name[1]
				twoway  	(scatter grid_y grid_x , msymbol(s) mlcolor(none) msize(tiny) mcolor(black*0.25)) ///
						(scatter grid_y grid_x if developable == 1, msymbol(s) mlcolor(none) msize(tiny) mcolor(black*0.4)) ///
						(scatter grid_y grid_x if emp > 0 , msymbol(s) mlcolor(none) msize(tiny) mcolor(black*0.75)) ///
						(scatter grid_y grid_x if  PL == 1 , msize(tiny)  mlcolor(none)  msymbol(s) mcolor(red*0.5)) ///
						(scatter PLY PLX , msize(tiny) msymbol(s)  mlcolor(none)  mcolor(red) mlabel(PLrankByCity) mlabcolor(red)) ///
						, legend(off) xsize(5) ysize(5) xlabel(0[10000]70000)		ylabel(0[10000]70000) graphregion(color(white))	title("`title'")
				capture mkdir "$temp/figures_App"
				capture mkdir "$temp/figures_App/US-CBSAs"
				capture mkdir "$temp/figures_App/US-CBSAs/ET${ET}"
				capture mkdir "$temp/figures_App/US-CBSAs/ET${ET}/MAPS"
			* Graph simple figure, for quick inspection only, the figures shared via the toolkit will be generated later	
				qui graph export "$temp/figures_App/US-CBSAs/ET${ET}/MAPS/MAPS_PL_TotalEmp_BaseP_`USmid'.png", replace height(1000) width(1000)
		
			
			* Save outputs *****************************************************
				qui drop TrimmedCluster Cemp Carea Cempdens CempMax CempRel TrimmedClusterID TrimmedClusterX TrimmedCluster
				qui save "$temp/grid_PL_output_ET${ET}/grid_PL_output_ET${ET}_`USmid'.dta", replace
				qui keep if PL == 1
				qui save "$temp/grid_PL_output_ET${ET}/PL_grids_ET${ET}_`USmid'", replace
			*Reset counter
				local `count' = `count' +1
			}
	* Loop ends
	
* Append outputs of loops into metro-grid files
		display "...compiling employment metro-grid data..."
		display `USMETROIDS'
		clear
		foreach USmid of local USMETROIDS{
			qui append using "$temp/grid_PL_output_ET${ET}/grid_PL_output_ET${ET}_`USmid'.dta"
			}	
			qui save "$temp/grid_PL_output_ET${ET}/grid_PL_output_ET${ET}__all.dta", replace
		clear 
		foreach USmid of local USMETROIDS{
			qui append using "$temp/grid_PL_output_ET${ET}/PL_grids_ET${ET}_`USmid'"
			}	
			qui keep if PL == 1
			qui save "$temp/grid_PL_output_ET${ET}/PL_grids_ET${ET}__all.dta", replace

* Script ends